DOES POPULATION DENSITY AND LAND USE AFFECT THE RATE OF REDUCTION OF FUKUSHIMA DAICHI RADIATIONS?

Part I: Data Loading, Cleaning and Exploring

1.1 Nihomatsu City,70km to Fukushima Daichi

1.11 Required Packages

library("leaflet")
library(readr)
library(dplyr)
library(RColorBrewer)
library(Hmisc)
library(ggplot2)
library(formatR)
1.11 Loading June 2011 and Nov 2013 Fukushima Data and selecting Nihomatsu’s.
fuk2011 <- read.csv("jun_2011_fukushima.csv")
fuk2013 <- read.csv("nov_2013_fukushima.csv")
1.12 Change to machine readeable column names
names(fuk2011) <- c("gridcode", "pref", "city", "gridCenterNorthlat", 
    "gridCenterEastlng", "gridCenterNorthlatDec", "gridCenterEastlngDec", 
    "daichi_distance", "no_samples", "AvgAirDoseRate", "NE_nLat", 
    "NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat", "SW_eLong", 
    "SE_nLat", "SE_eLong")
names(fuk2013) <- c("gridcode", "pref", "city", "gridCenterNorthlat", 
    "gridCenterEastlng", "gridCenterNorthlatDec", "gridCenterEastlngDec", 
    "daichi_distance", "no_samples", "AvgAirDoseRate", "NE_nLat", 
    "NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat", "SW_eLong", 
    "SE_nLat", "SE_eLong")
dim(fuk2013)
## [1] 119522     18
dim(fuk2011)
## [1] 45273    18
max(fuk2013$AvgAirDoseRate)
## [1] 39
min(fuk2013$AvgAirDoseRate)
## [1] 0.009
max(fuk2011$AvgAirDoseRate)
## [1] 37
min(fuk2011$AvgAirDoseRate)
## [1] 0.008
1.14 Create air dose quantiles that are plot-able,i.e 6 categorical variables.
fuk2011_q <- fuk2011 %>% mutate(dose_quants = cut2(fuk2011$AvgAirDoseRate, 
    cuts = c(0.1, 1, 5, 15, 25, 35, 45), levels.mean = TRUE))
fuk2013_q <- fuk2013 %>% mutate(dose_quants = cut2(fuk2013$AvgAirDoseRate, 
    cuts = c(0.1, 1, 5, 15, 25, 35, 45), levels.mean = TRUE))
  • Visible reduction of Average Air Dose Distribution by half in Nihomatsu.
  • Trouble is knowing the distribution of causes of this reduction?
1.15 Color function
iro <- colorFactor(palette = "YlOrRd", domain = fuk2011_q$dose_quants)
iro2013 <- colorFactor(palette = "YlOrRd", domain = fuk2013_q$dose_quants)
# Link of Daichi
fukulink <- paste(sep = "<br/>", "<br><a href='http://www.tepco.co.jp
                  /en/decommision/index-e.html'>Fukushima Daichi</a></b>", 
    "Source of radiations")
1.16 Fukushima Average Air Dose Rate for 2011
fuk2011_plot <- leaflet() %>% addTiles() %>% addRectangles(data = fuk2011_q, 
    lng1 = ~SW_eLong, lat1 = ~SW_nLat, lng2 = ~NE_eLong, lat2 = ~NE_nLat, 
    color = ~iro(fuk2011_q$dose_quants)) %>% addLegend("bottomright", 
    pal = iro, values = fuk2011_q$dose_quants, title = "AvgAirDoseRate", 
    labFormat = labelFormat(prefix = "µSv/h "), opacity = 1) %>% 
    addPopups(lat = 37.4211, lng = 141.0328, popup = fukulink, 
        options = popupOptions(closeButton = TRUE))
fuk2011_plot


1.17 Fukushima Average Air Dose Rate for 2013
fuk2013_plot <- leaflet() %>%
        addTiles()%>%
        addRectangles(data = fuk2013_q,lng1 = ~SW_eLong, lat1 = ~SW_nLat,
                      lng2 = ~NE_eLong, lat2 = ~NE_nLat,
                      color = ~iro2013(fuk2013_q$dose_quants))%>%
        addLegend("bottomright", pal = iro2013, values = fuk2013_q$dose_quants,
                  title = "AvgAirDoseRate",
                  labFormat = labelFormat(prefix = "µSv/h "),
                  opacity = 1)%>%
        addPopups(lat = 37.4211, lng = 141.0328,popup = fukulink,
                  options = popupOptions(closeButton = TRUE)) 
fuk2013_plot


1.18 Nihommatsu 2011, Counts of Measuring Locations per Air Dose Rate
ggplot(fuk2011_q, aes(daichi_distance,AvgAirDoseRate)) +
        geom_point() +
        geom_smooth(se = FALSE)+
        ggtitle("AvgAirDose against Distance to Daichi Plant")

1.19 Nihommatsu 2011,Counts of Measuring Locations per Air Dose Rate
ggplot(data = fuk2013_q) +
        geom_bar(mapping = aes(x = daichi_distance, fill = dose_quants), width = 1)+
        ggtitle("AvgAirDose Measured Counts against Daichi Distance")

2.10 Air Dose Rate Reduction due to Distance to Fukushima Daichi

\[AvgAirDoseRate = \beta_{0} + \beta_{1} daichi distance + \epsilon_{i}\]

fit1 <- lm(AvgAirDoseRate~daichi_distance, data = fuk2011)
summary(fit1)
## 
## Call:
## lm(formula = AvgAirDoseRate ~ daichi_distance, data = fuk2011)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.226 -0.901 -0.321  0.165 34.291 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.7838704  0.0267272  104.16   <2e-16 ***
## daichi_distance -0.0288916  0.0004048  -71.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.278 on 45271 degrees of freedom
## Multiple R-squared:  0.1012, Adjusted R-squared:  0.1011 
## F-statistic:  5095 on 1 and 45271 DF,  p-value: < 2.2e-16
#Confidence interval
confint(fit1)
##                       2.5 %      97.5 %
## (Intercept)      2.73148462  2.83625613
## daichi_distance -0.02968495 -0.02809831

2.20 Air Dose Rate Reduction due to Population Density and Land Use

\[AvgAirDoseRate = \beta_{0} + \beta_{1} popn density + \beta_{2} human activities + \epsilon_{i}\]

Challenge

The Air Dose Rate measurements were collected from three sources:

  1. Cars and Buses with Sensors: Measure Air of 100m&#0178 from the road
  2. Un manned Auto Helicopter: Measure from 300m above ground
  3. Fixed Sensors: Geographically sparsed

What constitutes Land Use?:

  1. Farming: Short life span crops lead to frequent land tilling
  2. Cleaned Places: Parks, GolfCourses, Gardens are cleaned too and could reduce radiations

Population density and land usage is measured on a 500m² basis

fuk_pop <- read.csv("fuk.csv")
View(fuk_pop)
# Convert mesh gridecode to lat/long
mesh_latlong <- function (mesh, loc = "C")
{
    mesh <- as.character(mesh)

    if (length(grep("^[0-9]{4}", mesh)) == 1) { 
        mesh12 <- as.numeric(substring(mesh, 1, 2))
        mesh34 <- as.numeric(substring(mesh, 3, 4))
        lat_width  <- 2 / 3;
        long_width <- 1;
    }
    else {
        return(NULL)
    }

    if (length(grep("^[0-9]{6}", mesh)) == 1) { 
        mesh5 <- as.numeric(substring(mesh, 5, 5))
        mesh6 <- as.numeric(substring(mesh, 6, 6))
        lat_width  <- lat_width / 8;
        long_width <- long_width / 8;
    }

    if (length(grep("^[0-9]{8}", mesh)) == 1) { 
        mesh7 <- as.numeric(substring(mesh, 7, 7))
        mesh8 <- as.numeric(substring(mesh, 8, 8))
        lat_width  <- lat_width / 10;
        long_width <- long_width / 10;
    }

    lat  <- mesh12 * 2 / 3;          
    long <- mesh34 + 100;

    if (exists("mesh5") && exists("mesh6")) {  
        lat  <- lat  + (mesh5 * 2 / 3) / 8;
        long <- long +  mesh6 / 8;
    }
    if (exists("mesh7") && exists("mesh8")) {  
        lat  <- lat  + (mesh7 * 2 / 3) / 8 / 10;
        long <- long +  mesh8 / 8 / 10;
    }

    if (loc == "C") {   
        lat  <-  lat  + lat_width  / 2;
        long <-  long + long_width / 2;
    }
    if (length(grep("N", loc)) == 1) {  
        lat  <- lat  + lat_width;
    }
    if (length(grep("E", loc) == 1)) {  
        long <- long +long_width;
    }

    lat  <- sprintf("%.8f", lat); 
    long <- sprintf("%.8f", long);

    x <- list(as.numeric(lat), as.numeric(long))
    names(x) <- c("lat", "long")

    return (x)
}
# j <- mesh_latlong(mesh = 564000463 , loc = "C")
# class(j);print(j);length(j)
grides <- fuk_pop[,1]
head(grides);class(grides);length(grides)
## [1] 564000194 564000364 564000382 564000393 564000462 564000463
## [1] "integer"
## [1] 10831
grides[2]
## [1] 564000364
#create the lat/long
mylist <- list()
for (i in 1:length(grides) ){
        lis <- mesh_latlong(mesh = grides[i], loc = "C")
        mylist[[i]] <- lis
}

res <- as.data.frame(mylist)


#test
# df <- data.frame(matrix(unlist(res), nrow=10831, byrow=T))
# df <- ldply (res, data.frame)

library(data.table)
df <- as.data.frame(rbindlist(mylist, fill=TRUE))
df[,"gridcode"] <- grides 
View(df)
# merge gridcode and lat/lon datasets
fuk_ll <- merge(fuk_pop, df, by.x = "gridcode", by.y = "gridcode", all = TRUE)
write.csv(fuk_ll, file = "fuk_ll.csv")
#trial plots
pop_plot <- leaflet() %>%
        addTiles()%>%
        addPolylines(data = fuk_ll,lng = ~long, lat = ~lat)
pop_plot        
#remove duplicate lat/lon
uniq <- unique(x=df[,-3])

uniq_plot <- leaflet() %>%
        addTiles()%>%
        addCircles(data = uniq,lng = ~long, lat = ~lat)
uniq_plot
# Unique, lat or lon only
data <- subset(fuk_ll, !duplicated(fuk_ll[,7]))
dim(data)
## [1] 80  7
View(data)
uniq_plot <- leaflet() %>%
        addTiles()%>%
        addCircles(data = data,lng = ~long, lat = ~lat)
uniq_plot

2.30 Using other machine learning algorithms

The Fukushima Nuclear Radiations is multi dimensional;

  • Three major Isotopes each with;
  • Time series due to half life
  • Magnitude of radiation (Becquerel)
  • Absorbed dose (Sievert (Sv))
  • The above dimensionalities coupled with distance,population density and land use, create a data set that can be run on extensive machine learning algorithms like Support Vector Machines, Random Forest, Recurrent Neural Networks and more.

    end